library(tidyverse)
## -- Attaching packages ----------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts -------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
SNPs<- read.table("23andMe_complete.txt", header = TRUE, sep = "\t")
### Exercise 1
SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1,22), "X","Y","MT"))
ggplot(SNPs, aes(chromosome))+
geom_bar(color="blue", fill="blue") +
ggtitle("Total SNPs for each Chromosome")

### Exercise 2
ggplot(SNPs, aes(chromosome, fill = genotype))+
geom_bar()+
ggtitle("Total Number of SNPs for each Chromosome")+
ylab("Number of SNPs")+
xlab("Chromosome Number")+
scale_fill_manual(values = c("AA"="red","CC"="red","GG"="red","TT"="red","AC"="red","AG"="red","AT"="red","CG"="red","CT"="red","GT"="red","A"="blue","C"="blue","G"="blue","T"="blue","D"="green","DD"="green","DI"="green","II"="green","I"="green","--" = "green"))

### Exercise 3
ppi <- 300
png("SNP_plot.png",width = 6*ppi, height = 6*ppi, res=ppi)
ggplot(SNPs, aes(chromosome, fill = genotype))+
geom_bar(position="dodge2")+
ggtitle("Total Number of SNPs for each Chromosome")+
ylab("Number of SNPs")+
xlab("Chromosome Number")+
scale_fill_manual(values = c("AA"="red","CC"="red","GG"="red","TT"="red","AC"="red","AG"="red","AT"="red","CG"="red","CT"="red","GT"="red","A"="blue","C"="blue","G"="blue","T"="blue","D"="green","DD"="green","DI"="green","II"="green","I"="green","--" = "green"))
dev.off()
## png
## 2
### Exercise 4
ggplot(SNPs, aes(chromosome, fill = genotype))+
geom_bar(position="dodge2")+
facet_wrap(~genotype,ncol = 2, scales = "free_y")+
ggtitle("Total Number of SNPs for each Chromosome")+
ylab("Number of SNPs")+
xlab("Chromosome Number")+
scale_fill_manual(values = c("AA"="red","CC"="red","GG"="red","TT"="red","AC"="red","AG"="red","AT"="red","CG"="red","CT"="red","GT"="red","A"="blue","C"="blue","G"="blue","T"="blue","D"="green","DD"="green","DI"="green","II"="green","I"="green","--" = "green"))

library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
### Exercise 5
ggplotly()
ggplot(SNPs, aes(chromosome, fill = genotype))+
geom_bar(position="dodge2")+
facet_wrap(~genotype,ncol = 2, scales = "free_y")+
ggtitle("Total Number of SNPs for each Chromosome")+
ylab("Number of SNPs")+
xlab("Chromosome Number")+
scale_fill_manual(values = c("AA"="red","CC"="red","GG"="red","TT"="red","AC"="red","AG"="red","AT"="red","CG"="red","CT"="red","GT"="red","A"="blue","C"="blue","G"="blue","T"="blue","D"="green","DD"="green","DI"="green","II"="green","I"="green","--" = "green"))

library(DT)
### Exercise 6
datatable(head(SNPs), class = "Y Chromosome")
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html